import plotly.offline as pyo
from plotly.graph_objs import *
import chart_studio.plotly as py
import pandas as pd
from pandas import DataFrame
pyo.offline.init_notebook_mode()
sizes = pd.read_csv(r"../Data/MeteoriteLandingsbyWeightPerYear.csv", index_col = 0)
sizes.head()
| 101 - 500g | 501 - 1000g | less than 101g | more than 1000g | |
|---|---|---|---|---|
| year | ||||
| 2000 | 522 | 137 | 888 | 232 |
| 2001 | 503 | 144 | 818 | 185 |
| 2002 | 497 | 123 | 1249 | 203 |
| 2003 | 555 | 134 | 2425 | 208 |
| 2004 | 394 | 119 | 1289 | 136 |
sizeStrings = sizes.columns.tolist()
sizeStrings
['101 - 500g', '501 - 1000g', 'less than 101g', 'more than 1000g']
sizes['total'] = sizes.sum(axis = 1)
for s in sizeStrings:
sizes["{}_pc".format(s)] = sizes[s] / sizes['total']
sizes.head()
| 101 - 500g | 501 - 1000g | less than 101g | more than 1000g | total | 101 - 500g_pc | 501 - 1000g_pc | less than 101g_pc | more than 1000g_pc | |
|---|---|---|---|---|---|---|---|---|---|
| year | |||||||||
| 2000 | 522 | 137 | 888 | 232 | 1779 | 0.293423 | 0.077010 | 0.499157 | 0.130410 |
| 2001 | 503 | 144 | 818 | 185 | 1650 | 0.304848 | 0.087273 | 0.495758 | 0.112121 |
| 2002 | 497 | 123 | 1249 | 203 | 2072 | 0.239865 | 0.059363 | 0.602799 | 0.097973 |
| 2003 | 555 | 134 | 2425 | 208 | 3322 | 0.167068 | 0.040337 | 0.729982 | 0.062613 |
| 2004 | 394 | 119 | 1289 | 136 | 1938 | 0.203302 | 0.061404 | 0.665119 | 0.070175 |
meteorites = pd.read_csv(r"../Data/MeteoritesByContinent.csv", index_col = 0)
meteorites.head()
| continent | year | count | |
|---|---|---|---|
| 0 | Africa | 2000 | 239 |
| 1 | Africa | 2001 | 87 |
| 2 | Africa | 2002 | 109 |
| 3 | Africa | 2003 | 30 |
| 4 | Africa | 2004 | 17 |
continents = list(meteorites['continent'].unique())
continents
['Africa', 'Antarctica', 'Asia', 'Australia', 'Europe', 'North America', 'South America']
meteorites = meteorites.pivot(index='year',columns = 'continent', values='count')
meteorites.head()
| continent | Africa | Antarctica | Asia | Australia | Europe | North America | South America |
|---|---|---|---|---|---|---|---|
| year | |||||||
| 2000 | 239.0 | 806.0 | 389.0 | NaN | 15.0 | 46.0 | 4.0 |
| 2001 | 87.0 | 499.0 | 636.0 | NaN | 1.0 | 50.0 | 5.0 |
| 2002 | 109.0 | 586.0 | 281.0 | 2.0 | 10.0 | 37.0 | 5.0 |
| 2003 | 30.0 | 1422.0 | 207.0 | 1.0 | 7.0 | 32.0 | 14.0 |
| 2004 | 17.0 | 30.0 | 155.0 | 1.0 | 5.0 | 53.0 | 3.0 |
meteorites.fillna(value = 0, inplace = True)
meteorites.head()
| continent | Africa | Antarctica | Asia | Australia | Europe | North America | South America |
|---|---|---|---|---|---|---|---|
| year | |||||||
| 2000 | 239.0 | 806.0 | 389.0 | 0.0 | 15.0 | 46.0 | 4.0 |
| 2001 | 87.0 | 499.0 | 636.0 | 0.0 | 1.0 | 50.0 | 5.0 |
| 2002 | 109.0 | 586.0 | 281.0 | 2.0 | 10.0 | 37.0 | 5.0 |
| 2003 | 30.0 | 1422.0 | 207.0 | 1.0 | 7.0 | 32.0 | 14.0 |
| 2004 | 17.0 | 30.0 | 155.0 | 1.0 | 5.0 | 53.0 | 3.0 |
meteorites['total'] = meteorites.sum(axis = 1)
meteorites.head()
| continent | Africa | Antarctica | Asia | Australia | Europe | North America | South America | total |
|---|---|---|---|---|---|---|---|---|
| year | ||||||||
| 2000 | 239.0 | 806.0 | 389.0 | 0.0 | 15.0 | 46.0 | 4.0 | 1499.0 |
| 2001 | 87.0 | 499.0 | 636.0 | 0.0 | 1.0 | 50.0 | 5.0 | 1278.0 |
| 2002 | 109.0 | 586.0 | 281.0 | 2.0 | 10.0 | 37.0 | 5.0 | 1030.0 |
| 2003 | 30.0 | 1422.0 | 207.0 | 1.0 | 7.0 | 32.0 | 14.0 | 1713.0 |
| 2004 | 17.0 | 30.0 | 155.0 | 1.0 | 5.0 | 53.0 | 3.0 | 264.0 |
for c in continents:
meteorites["{}_pc".format(c)] = meteorites[c] / meteorites['total']
meteorites.head()
| continent | Africa | Antarctica | Asia | Australia | Europe | North America | South America | total | Africa_pc | Antarctica_pc | Asia_pc | Australia_pc | Europe_pc | North America_pc | South America_pc |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | |||||||||||||||
| 2000 | 239.0 | 806.0 | 389.0 | 0.0 | 15.0 | 46.0 | 4.0 | 1499.0 | 0.159440 | 0.537692 | 0.259506 | 0.000000 | 0.010007 | 0.030687 | 0.002668 |
| 2001 | 87.0 | 499.0 | 636.0 | 0.0 | 1.0 | 50.0 | 5.0 | 1278.0 | 0.068075 | 0.390454 | 0.497653 | 0.000000 | 0.000782 | 0.039124 | 0.003912 |
| 2002 | 109.0 | 586.0 | 281.0 | 2.0 | 10.0 | 37.0 | 5.0 | 1030.0 | 0.105825 | 0.568932 | 0.272816 | 0.001942 | 0.009709 | 0.035922 | 0.004854 |
| 2003 | 30.0 | 1422.0 | 207.0 | 1.0 | 7.0 | 32.0 | 14.0 | 1713.0 | 0.017513 | 0.830123 | 0.120841 | 0.000584 | 0.004086 | 0.018681 | 0.008173 |
| 2004 | 17.0 | 30.0 | 155.0 | 1.0 | 5.0 | 53.0 | 3.0 | 264.0 | 0.064394 | 0.113636 | 0.587121 | 0.003788 | 0.018939 | 0.200758 | 0.011364 |
pcContinents = [c + "_pc" for c in continents]
sortKeys = dict(meteorites[pcContinents].sum())
pcContinents = sorted(pcContinents, key=lambda k: sortKeys[k], reverse=True)
traces = []
for c in pcContinents:
traces.append({'type' : 'bar',
'name' : c[:-3],
'x' : meteorites.index,
'y' : meteorites[c],
'opacity' : 0.7})
layout = {'title' : "Proportion of Meteorites Found by Continent, 2000-2012",
'barmode' : 'stack',
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Proportion of meteorites',
'tickformat' : '%',
'hoverformat' : '%',},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
traces.append({'type' : 'scatter',
'mode' : 'lines+markers',
'x' : sizes.index,
'y' : sizes['less than 101g_pc'],
'marker' : {'color' : '#333'},
'name' : 'Meteorite < 101g'})
pyo.iplot(fig)